accelerate launch --config_file "configs/deepspeed_config.yaml"  train.py \
--do_train True \
--do_eval True \
--do_test True \
--seed 100 \
--model_name_or_path "../pretrain/Qwen2.5-7B-Instruct/Qwen2.5-7B-Instruct" \
--dataset_name "./dataset.json" \
--chat_template_format "qwen" \
--add_special_tokens False \
--append_concat_token False \
--splits "train,validation,test" \
--max_seq_len 2048 \
--num_train_epochs 2 \
--logging_steps 3 \
--log_level "info" \
--logging_strategy "steps" \
--eval_strategy "epoch" \
--save_strategy "steps" \
--save_steps 200 \
--push_to_hub false \
--hub_private_repo True \
--bf16 True \
--packing False \
--learning_rate 1e-4 \
--lr_scheduler_type "cosine_with_restarts" \
--weight_decay 1e-4 \
--warmup_ratio 0.05 \
--max_grad_norm 1.0 \
--output_dir "mistral-sft-lora-deepspeed" \
--per_device_train_batch_size 4 \
--per_device_eval_batch_size 4 \
--gradient_accumulation_steps 4 \
--gradient_checkpointing True \
--use_reentrant False \
--dataset_text_field "content" \
--use_flash_attn True \
--use_peft_lora True \
--lora_r 8 \
--lora_alpha 16 \
--lora_dropout 0.05 \
--lora_target_modules "all-linear" \
--use_4bit_quantization False \
--report_to none

# 训练后在测试集上进行单独评估
echo "在测试集上进行评估..."
python evaluate_model.py \
    --model_path "mistral-sft-lora-deepspeed" \
    --dataset_path "./dataset.json" \
    --output_dir "./eval_results" \
    --max_length 512 \
    --max_new_tokens 256 \
    --batch_size 4 \
    --temperature 0.7 \
    --top_p 0.9 \
    --chat_template_format "qwen" \
    --use_test_set